# study area
# studyarea <- st_read("data/StudyArea.shp") %>%
studyarea <- st_read("~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/Dataset/studyarea/StudyArea.shp") %>%
st_transform('EPSG:2272')
## Reading layer `StudyArea' from data source
## `/Users/akiradisandro/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/Dataset/studyarea/StudyArea.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 1 feature and 1 field
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -8367662 ymin: 4859107 xmax: -8365440 ymax: 4860628
## Projected CRS: WGS 84 / Pseudo-Mercator
# Philly block groups
# blockgroups <- st_read("data/Philly_blockgroup.shp") %>%
blockgroups <- st_read("~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/Dataset/Philly_blockgroup/Philly_blockgroup.shp") %>%
st_transform('EPSG:2272')
## Reading layer `Philly_blockgroup' from data source
## `/Users/akiradisandro/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/Dataset/Philly_blockgroup/Philly_blockgroup.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 1338 features and 18 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -8380161 ymin: 4846701 xmax: -8344037 ymax: 4886015
## Projected CRS: WGS 84 / Pseudo-Mercator
# philly bounds
philly_bounds <- st_union(blockgroups)
# philly hydrology (bounded by philly_bounds; source: https://opendataphilly.org/datasets/hydrology/)
hydro <- st_read("https://services.arcgis.com/fLeGjb7u4uXqeF9q/arcgis/rest/services/Hydrographic_Features_Poly/FeatureServer/1/query?outFields=*&where=1%3D1&f=geojson") %>%
st_transform(crs = 'EPSG:2272') %>%
st_intersection(philly_bounds)
## Reading layer `OGRGeoJSON' from data source
## `https://services.arcgis.com/fLeGjb7u4uXqeF9q/arcgis/rest/services/Hydrographic_Features_Poly/FeatureServer/1/query?outFields=*&where=1%3D1&f=geojson'
## using driver `GeoJSON'
## Simple feature collection with 7970 features and 29 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -76.18462 ymin: 38.7667 xmax: -74.71871 ymax: 40.66593
## Geodetic CRS: WGS 84
# highway
# stateroads_inphilly <- st_read("data/PaStateRoads2024_03.geojson") %>%
# st_transform('EPSG:2272') %>%
# st_intersection(philly_bounds)
# write out stateroads_inphilly as its own file since it's big
# st_write(stateroads_inphilly,
# "data/PhillyStateRoads.shp")
# read in pre-filte#1a9988 stateroads data
stateroads_inphilly <- st_read("~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/PhillyStateRoads.shp")
## Reading layer `PhillyStateRoads' from data source
## `/Users/akiradisandro/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/PhillyStateRoads.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 2315 features and 105 fields
## Geometry type: MULTILINESTRING
## Dimension: XY
## Bounding box: xmin: 2660586 ymin: 207551.4 xmax: 2749290 ymax: 304306.6
## Projected CRS: NAD83 / Pennsylvania South (ftUS)
philly_mainhighways <- stateroads_inphilly %>%
filter(TRAF_RT_NO %in% c("I", "US"),
ST_RT_NO %in% c("0001", "0095", "0076", "0676")) %>%
dplyr::select(STREET_NAM, ST_RT_NO, TRAF_RT_NO, TRAF_RT__1)
# save and only use the highways of interest
# for now, interested in comparing highways that cut through neighborhoods vs those that don't
# ACS
# philly property data (https://opendataphilly.org/datasets/philadelphia-properties-and-assessment-history/) <-- that was the original source, but now i'm using the same downloaded file that Luming and Sijia have been working from
philly_properties <- st_read("~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/opa_properties_public.geojson") %>%
st_transform(crs = 'EPSG:2272')
## Reading layer `opa_properties_public' from data source
## `/Users/akiradisandro/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/opa_properties_public.geojson'
## using driver `GeoJSON'
## replacing null geometries with empty geometries
## Simple feature collection with 584002 features and 78 fields (with 171 geometries empty)
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -75.27439 ymin: 39.87513 xmax: -74.95819 ymax: 40.1377
## Geodetic CRS: WGS 84
# philly park data (https://opendataphilly.org/datasets/ppr-properties/)
philly_parks <- st_read("https://opendata.arcgis.com/datasets/d52445160ab14380a673e5849203eb64_0.geojson") %>%
st_transform(crs = 'EPSG:2272')
## Reading layer `PPR_Properties' from data source
## `https://opendata.arcgis.com/datasets/d52445160ab14380a673e5849203eb64_0.geojson'
## using driver `GeoJSON'
## Simple feature collection with 507 features and 25 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -75.28353 ymin: 39.87117 xmax: -74.95865 ymax: 40.13186
## Geodetic CRS: WGS 84
# philly schools
school <-
st_read("https://opendata.arcgis.com/datasets/d46a7e59e2c246c891fbee778759717e_0.geojson") %>%
st_transform('EPSG:2272')
## Reading layer `Schools' from data source
## `https://opendata.arcgis.com/datasets/d46a7e59e2c246c891fbee778759717e_0.geojson'
## using driver `GeoJSON'
## Simple feature collection with 495 features and 15 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -75.2665 ymin: 39.90781 xmax: -74.97057 ymax: 40.12974
## Geodetic CRS: WGS 84
# city facilities (https://opendataphilly.org/datasets/city-facilities-master-facilities-database/)
facilities <- st_read("https://opendata.arcgis.com/datasets/b3c133c3b15d4c96bcd4d5cc09f19f4e_0.geojson") %>%
st_transform('EPSG:2272') %>%
filter(STATUS == "A") # remove inactive sites
## Reading layer `City_Facilities_pub' from data source
## `https://opendata.arcgis.com/datasets/b3c133c3b15d4c96bcd4d5cc09f19f4e_0.geojson'
## using driver `GeoJSON'
## replacing null geometries with empty geometries
## Simple feature collection with 3197 features and 32 fields (with 5 geometries empty)
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -75.28169 ymin: 39.86686 xmax: -74.96455 ymax: 40.13108
## Geodetic CRS: WGS 84
# libraries (from facilities data)
libraries <- facilities %>%
filter(ASSET_GROUP1 == "A8")
# produce (LPSS, HPSS -- indicators for produce; low or high produce supply stores)
retail_produce <- st_read("https://opendata.arcgis.com/datasets/53b8a1c653a74c92b2de23a5d7bf04a0_0.geojson") %>%
st_transform('EPSG:2272')
## Reading layer `370eb703-5a4f-4abb-8920-727cef31373b2020329-1-rcimn.5n39o' from data source `https://opendata.arcgis.com/datasets/53b8a1c653a74c92b2de23a5d7bf04a0_0.geojson'
## using driver `GeoJSON'
## Simple feature collection with 1336 features and 16 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -75.28031 ymin: 39.86747 xmax: -74.95575 ymax: 40.13793
## Geodetic CRS: WGS 84
# checking out what Luming_property.csv looks like
luming_property <- read.csv("~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/Dataset/Luming_property.csv")
# looking at which properties were saved in luming_property -- Luming and Sijia's properties files are limited to the case study area
# luming_properties <- philly_properties %>%
# filter(objectid %in% luming_property$objectid)
# checking out what property_sijia_eda.geojson looks like
sijia_eda <- st_read("~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/Dataset/property_sijia_eda.geojson")
## Reading layer `property_sijia_eda' from data source
## `/Users/akiradisandro/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/Dataset/property_sijia_eda.geojson'
## using driver `GeoJSON'
## Simple feature collection with 2394 features and 98 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 2692333 ymin: 236562 xmax: 2697665 ymax: 240121.5
## Projected CRS: NAD83 / Pennsylvania South (ftUS)
# interstate highways in philly
ggplot() +
geom_sf(data = philly_bounds) +
geom_sf(data = stateroads_inphilly %>%
filter(TRAF_RT_NO == "I"))
# US highways in philly
ggplot() +
geom_sf(data = philly_bounds) +
geom_sf(data = stateroads_inphilly %>%
filter(TRAF_RT_NO == "US"))
# PA state highways in philly
ggplot() +
geom_sf(data = philly_bounds) +
geom_sf(data = stateroads_inphilly %>%
filter(TRAF_RT_NO == "PA"))
# all other roads in philly
ggplot() +
geom_sf(data = philly_bounds) +
geom_sf(data = stateroads_inphilly %>%
filter(!(TRAF_RT_NO %in% c("I", "US", "PA"))))
# I-95, US-1, I-676, and I-76
ggplot() +
geom_sf(data = philly_bounds, fill = "#b39624", color = "#57470a") +
geom_sf(data = hydro, fill = "#96dbe3", color = "transparent") +
geom_sf(data = philly_mainhighways,
aes(color = ST_RT_NO)) +
# add custom colors
scale_color_manual(values = c("0001" = "#1a9988",
"0076" = "#eb5600",
"0095" = "#7fe3d6",
"0676" = "#eba075")) +
labs(title = "Case Study of Highways in Philly",
subtitle = "US-1, I-76, I-95, and I-676",
color = "Highways") +
theme_void()
# looking at study area within philly bounds
ggplot() +
geom_sf(data = philly_bounds, fill = "#b39624") +
geom_sf(data = hydro, fill = "#96dbe3", color = "transparent") +
geom_sf(data = studyarea, fill = "coral")
# philly parks
ggplot() +
geom_sf(data = philly_bounds, fill = "#b39624") +
geom_sf(data = hydro, fill = "#96dbe3", color = "transparent") +
geom_sf(data = philly_parks, fill = "darkgreen", color = "transparent")
# philly schools
ggplot() +
geom_sf(data = philly_bounds, fill = "#b39624") +
geom_sf(data = hydro, fill = "#96dbe3", color = "transparent") +
geom_sf(data = school, color = "brown4") # point data
# libraries
ggplot() +
geom_sf(data = philly_bounds, fill = "#b39624") +
geom_sf(data = hydro, fill = "#96dbe3", color = "transparent") +
geom_sf(data = libraries, color = "#eb5600") # point data
# transit / looking at what this category actually consists of....
# looks like there's point geography for parking lots, some random transit stops, and heli pad station
ggplot() +
geom_sf(data = philly_bounds, fill = "#b39624") +
geom_sf(data = hydro, fill = "#96dbe3", color = "transparent") +
geom_sf(data = facilities %>%
filter(ASSET_GROUP1 == "A17",
grepl("Parking", ASSET_SUBT1_DESC)), color = "pink") # point data
ggplot() +
geom_sf(data = philly_bounds, fill = "#b39624") +
geom_sf(data = hydro, fill = "#96dbe3", color = "transparent") +
geom_sf(data = sijia_eda, color = "#7fe3d6")
# prison facilities
ggplot() +
geom_sf(data = philly_bounds, fill = "#b39624") +
geom_sf(data = hydro, fill = "#96dbe3", color = "transparent") +
geom_sf(data = facilities %>%
filter(ASSET_GROUP2 == "A13.3"), color = "lightgrey") # point data
# all philly properties
ggplot() +
geom_sf(data = philly_bounds, fill = "#b39624") +
geom_sf(data = hydro, fill = "#96dbe3", color = "transparent") +
geom_sf(data = philly_properties, color = "black")
For all Philly properties, calculate the distance from highway and see how it relates to price.
# taking Luming's distance calculator function
calculate_nearest_distance <- function(set_points, other_layer) {
nearest_idx <- st_nearest_feature(set_points, other_layer)
st_distance(set_points, other_layer[nearest_idx, ], by_element = TRUE) %>% as.numeric()
}
The following code chunk took too much to load (too many properties to consider), so I’m going to limit it to the properties within 500m (and possibly look at 1000m and 1500m) from highways to look at the immediate effects of highways on property prices.
# only keep properties that are within 500m (1640.42 ft) of highways of interest
buff500 <- 500 * 3.28084
highways_500buffer <- st_union(st_buffer(philly_mainhighways, buff500))
properties500 <- philly_properties[highways_500buffer,]
# only keep properties that are within 1000m (3280.84 ft) of highways of interest
buff1000 <- 1000 * 3.28084
highways_1000buffer <- st_union(st_buffer(philly_mainhighways, buff1000))
properties1000 <- philly_properties[highways_1000buffer,]
# only keep properties that are within 1500m (4921.26 ft) of highways of interest
buff1500 <- 1500 * 3.28084
highways_1500buffer <- st_union(st_buffer(philly_mainhighways, buff1500))
properties1500 <- philly_properties[highways_1500buffer,]
# visualizing buffer
ggplot() +
geom_sf(data = philly_bounds, fill = "#b39624", color = "#57470a") +
geom_sf(data = hydro, fill = "#96dbe3", color = "transparent") +
geom_sf(data = highways_500buffer,
color = "transparent", fill = "grey", alpha = 0.4) +
geom_sf(data = philly_mainhighways,
aes(color = ST_RT_NO)) +
# add custom colors
scale_color_manual(values = c("0001" = "#1a9988",
"0076" = "#eb5600",
"0095" = "#7fe3d6",
"0676" = "#eba075")) +
labs(title = "Case Study of Highways in Philly",
subtitle = "US-1, I-76, I-95, and I-676",
color = "Highways",
fill = "Highways") +
theme_void()
# visualizing properties to make sure we got the right ones
ggplot() +
geom_sf(data = philly_bounds, fill = "#b39624", color = "#57470a") +
geom_sf(data = hydro, fill = "#96dbe3", color = "transparent") +
geom_sf(data = philly_parks, fill = "darkgreen", color = "transparent") +
geom_sf(data = highways_500buffer,
color = "transparent", fill = "grey", alpha = 0.4) +
geom_sf(data = properties500,
color = "#4a4a4a", size = .3) +
geom_sf(data = philly_mainhighways,
aes(color = ST_RT_NO)) +
# add custom colors
scale_color_manual(values = c("0001" = "#1a9988",
"0076" = "#eb5600",
"0095" = "#7fe3d6",
"0676" = "#eba075")) +
labs(title = "Properties within 500m of Highways in Philly",
subtitle = "US-1, I-76, I-95, and I-676",
color = "Highways",
fill = "Highways") +
theme_void()
ggplot() +
geom_sf(data = philly_bounds, fill = "#b39624", color = "#57470a") +
geom_sf(data = hydro, fill = "#96dbe3", color = "transparent") +
geom_sf(data = philly_parks, fill = "darkgreen", color = "transparent") +
geom_sf(data = highways_1000buffer,
color = "transparent", fill = "grey", alpha = 0.4) +
geom_sf(data = properties1000,
color = "#4a4a4a", size = .3) +
geom_sf(data = philly_mainhighways,
aes(color = ST_RT_NO)) +
# add custom colors
scale_color_manual(values = c("0001" = "#1a9988",
"0076" = "#eb5600",
"0095" = "#7fe3d6",
"0676" = "#eba075")) +
labs(title = "Properties within 1000m of Highways in Philly",
subtitle = "US-1, I-76, I-95, and I-676",
color = "Highways",
fill = "Highways") +
theme_void()
ggplot() +
geom_sf(data = philly_bounds, fill = "#b39624", color = "#57470a") +
geom_sf(data = hydro, fill = "#96dbe3", color = "transparent") +
geom_sf(data = philly_parks, fill = "darkgreen", color = "transparent") +
geom_sf(data = highways_1500buffer,
color = "transparent", fill = "grey", alpha = 0.4) +
geom_sf(data = properties1500,
color = "#4a4a4a", size = .3) +
geom_sf(data = philly_mainhighways,
aes(color = ST_RT_NO)) +
# add custom colors
scale_color_manual(values = c("0001" = "#1a9988",
"0076" = "#eb5600",
"0095" = "#7fe3d6",
"0676" = "#eba075")) +
labs(title = "Properties within 1500m of Highways in Philly",
subtitle = "US-1, I-76, I-95, and I-676",
color = "Highways",
fill = "Highways") +
theme_void()
Are there differences between how price of property relates to dist_to_closest_highway depending on type of highway (whether highway cuts through neighborhood or not)?
# starting with 500m buffer
# need to check if there are 0 values in sale_price before log transforming
summary(properties500$sale_price)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0 10 89900 327917 230000 342000000 840
# Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
# 0 10 89900 327917 230000 342000000 840
# only keep properties whose sale_price >= $10,000
# according to #1a9988fin, the most expensive house in philadelphia right now is $25,000,000 -- using that as a cutoff
properties500_clean <- properties500 %>%
filter(sale_price >= 10000,
sale_price <= 25000000,
total_area >= 100,
total_livable_area >= 100) %>%
mutate(price_perTLA = sale_price / total_livable_area,
price_perTA = sale_price / total_area,
log_price = log(sale_price),
log_price_perTLA = log(price_perTLA),
log_price_perTA = log(price_perTA),
norm_log_price = scale(log_price),
norm_log_price_perTLA = scale(log_price_perTLA),
norm_log_price_perTA = scale(log_price_perTA))
summary(properties500_clean$sale_price)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 10000 62000 130000 276100 245000 24973920
# Min. 1st Qu. Median Mean 3rd Qu. Max.
# 10000 70000 153000 329289 300000 24973920
summary(properties500_clean$price_perTLA)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.086 47.203 96.988 144.989 175.308 11527.500
# Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
# 0.0865 54.8221 117.1875 Inf 231.4815 Inf 1741
summary(properties500_clean$price_perTA)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.039 37.313 73.077 172.586 161.290 22671.569
# correlations between total_area and total_livable_area, and sale_price
# cor(properties500_clean$total_area, properties500_clean$total_livable_area, use = "pairwise.complete.obs") # 0.3501135 -- why is this so low?
# cor(properties500_clean$total_area, properties500_clean$sale_price, use = "pairwise.complete.obs") # 0.2933512
# cor(properties500_clean$total_livable_area, properties500_clean$sale_price, use = "pairwise.complete.obs") # 0.3417108
# histogram of sale_price
ggplot(data = properties500_clean) +
geom_histogram(aes(x = sale_price),
bins = 100)
ggplot(data = properties500_clean) +
geom_histogram(aes(x = price_perTA),
bins = 100)
ggplot(data = properties500_clean) +
geom_histogram(aes(x = price_perTLA),
bins = 100)
# map of properties within 500 m of highway, colo#1a9988 by sale_price
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
geom_sf(data = hydro, fill = "#96dbe3", color = "transparent") +
geom_sf(data = philly_parks, fill = "darkgreen", color = "transparent") +
geom_sf(data = highways_500buffer,
color = "transparent", fill = "grey", alpha = 0.4) +
geom_sf(data = properties500_clean,
aes(color = sale_price), size = .3) +
geom_sf(data = philly_mainhighways,
color = "black") +
labs(title = "Sale Price of Properties within 500m of Highways in Philly",
subtitle = "US-1, I-76, I-95, and I-676") +
theme_void()
# map of properties within 500 m of highway, colo#1a9988 by sale_price
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
geom_sf(data = hydro, fill = "#96dbe3", color = "transparent") +
geom_sf(data = philly_parks, fill = "darkgreen", color = "transparent") +
geom_sf(data = highways_500buffer,
color = "transparent", fill = "grey", alpha = 0.4) +
geom_sf(data = properties500_clean,
aes(color = price_perTA), size = .3) +
geom_sf(data = philly_mainhighways,
color = "black") +
labs(title = "Sale Price (per Total Area) of Properties within 500m of Highways in Philly",
subtitle = "US-1, I-76, I-95, and I-676") +
theme_void()
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
geom_sf(data = hydro, fill = "#96dbe3", color = "transparent") +
geom_sf(data = philly_parks, fill = "darkgreen", color = "transparent") +
geom_sf(data = highways_500buffer,
color = "transparent", fill = "grey", alpha = 0.4) +
geom_sf(data = properties500_clean,
aes(color = price_perTLA), size = .3) +
geom_sf(data = philly_mainhighways,
color = "black") +
labs(title = "Sale Price (per Total Livable Area) of Properties within 500m of Highways in Philly",
subtitle = "US-1, I-76, I-95, and I-676") +
theme_void()
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
geom_sf(data = hydro, fill = "#96dbe3", color = "transparent") +
geom_sf(data = philly_parks, fill = "darkgreen", color = "transparent") +
geom_sf(data = highways_500buffer,
color = "transparent", fill = "grey", alpha = 0.4) +
geom_sf(data = properties500_clean,
aes(color = log_price), size = .3) +
geom_sf(data = philly_mainhighways,
color = "black") +
labs(title = "Log Sale Price of Properties within 500m of Highways in Philly",
subtitle = "US-1, I-76, I-95, and I-676") +
theme_void()
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
geom_sf(data = hydro, fill = "#96dbe3", color = "transparent") +
geom_sf(data = philly_parks, fill = "darkgreen", color = "transparent") +
geom_sf(data = highways_500buffer,
color = "transparent", fill = "grey", alpha = 0.4) +
geom_sf(data = properties500_clean,
aes(color = log_price_perTA), size = .3) +
geom_sf(data = philly_mainhighways,
color = "black") +
labs(title = "Log Sale Price (per Total Area) of Properties within 500m of Highways in Philly",
subtitle = "US-1, I-76, I-95, and I-676") +
theme_void()
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
geom_sf(data = hydro, fill = "#96dbe3", color = "transparent") +
geom_sf(data = philly_parks, fill = "darkgreen", color = "transparent") +
geom_sf(data = highways_500buffer,
color = "transparent", fill = "grey", alpha = 0.4) +
geom_sf(data = properties500_clean,
aes(color = norm_log_price_perTA), size = .3) +
geom_sf(data = philly_mainhighways,
color = "black") +
labs(title = "Scaled Log Sale Price (per Total Area) of Properties within 500m of Highways in Philly",
subtitle = "US-1, I-76, I-95, and I-676") +
theme_void()
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
geom_sf(data = hydro, fill = "#96dbe3", color = "transparent") +
geom_sf(data = philly_parks, fill = "darkgreen", color = "transparent") +
geom_sf(data = highways_500buffer,
color = "transparent", fill = "grey", alpha = 0.4) +
geom_sf(data = properties500_clean,
aes(color = log_price_perTLA), size = .3) +
geom_sf(data = philly_mainhighways,
color = "black") +
labs(title = "Log Sale Price (per Total Livable Area) of Properties within 500m of Highways in Philly",
subtitle = "US-1, I-76, I-95, and I-676") +
theme_void()
Calculate each property’s distance to highways.
# calculate each property's distance to highways of interest
# distance is in feet
start_time <- Sys.time()
prop500 <- properties500_clean %>%
select(matches("assessment_date|building_code|category_code|census_tract|geographic|zip|market_value|parcel|sale_|taxable|_area|year_built|objectid|geometry|price")) %>%
mutate(dist_to_US001 = calculate_nearest_distance(geometry,
philly_mainhighways %>%
filter(ST_RT_NO == "0001")),
dist_to_I076 = calculate_nearest_distance(geometry,
philly_mainhighways %>%
filter(ST_RT_NO == "0076")),
dist_to_I095 = calculate_nearest_distance(geometry,
philly_mainhighways %>%
filter(ST_RT_NO == "0095")),
dist_to_I676 = calculate_nearest_distance(geometry,
philly_mainhighways %>%
filter(ST_RT_NO == "0676")),
dist_to_US001m = dist_to_US001 * 0.3048,
dist_to_I076m = dist_to_I076 * 0.3048,
dist_to_I095m = dist_to_I095 * 0.3048,
dist_to_I676m = dist_to_I676 * 0.3048) # Time difference of 55.20697 secs
# find closest highway
prop500_closest <- prop500 %>%
select(matches("objectid|dist_to")) %>%
pivot_longer(cols = 2:5,
names_to = "highway",
values_to = "distance") %>%
group_by(objectid) %>%
summarise(dist_to_closest_highway = min(distance, na.rm = T)) %>%
ungroup() # Time difference of 1.736099 mins
# join closest highway
prop500 <- left_join(prop500,
prop500_closest %>% st_drop_geometry(),
by = "objectid") %>%
mutate(closest_highway = case_when(
dist_to_closest_highway == dist_to_US001 ~ "US-1",
dist_to_closest_highway == dist_to_I076 ~ "I-76",
dist_to_closest_highway == dist_to_I095 ~ "I-95",
dist_to_closest_highway == dist_to_I676 ~ "I-676",
.default = NA),
highway_type = case_when(closest_highway %in% c("US-1","I-676") ~ "through neighborhood",
closest_highway %in% c("I-95","I-76") ~ "along waterway",
.default = NA))
# save file
# st_write(prop500, "/Users/akiradisandro/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/philly_prop500_dist2highway.geojson")
# read in pre-saved file
# prop500_new <- st_read("/Users/akiradisandro/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/Dataset/data/philly_prop500_dist2highway.geojson")
Now, look at relationship between distance to highway and price.
# map of properties within 500m of highway colo#1a9988 by which highway they're closest to
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
geom_sf(data = hydro, fill = "#96dbe3", color = "transparent") +
geom_sf(data = philly_parks, fill = "darkgreen", color = "transparent") +
geom_sf(data = highways_500buffer,
color = "transparent", fill = "grey", alpha = 0.4) +
geom_sf(data = prop500,
aes(color = closest_highway), size = 0.3) +
geom_sf(data = philly_mainhighways,
color = "black") +
# add custom colors
scale_color_manual(values = c("US-1" = "#1a9988",
"I-76" = "#eb5600",
"I-95" = "#7fe3d6",
"I-676" = "#eba075")) +
labs(title = "Properties within 500m of Highways in Philly",
subtitle = "US-1, I-76, I-95, and I-676") +
theme_void()
# simple scatter of price and dist2closesthighway
ggplot(data = prop500,
aes(x = dist_to_closest_highway, y = sale_price,
color = closest_highway, fill = closest_highway)) +
geom_point(alpha = .2) +
geom_smooth(method = "lm") +
scale_color_manual(values = c("US-1" = "#1a9988",
"I-76" = "#eb5600",
"I-95" = "#7fe3d6",
"I-676" = "#eba075")) +
scale_fill_manual(values = c("US-1" = "#1a9988",
"I-76" = "#eb5600",
"I-95" = "#7fe3d6",
"I-676" = "#eba075")) +
labs(title = "Property Price as a function of Distance to Closest Highway",
subtitle = "Colored by Closest Highway; Properties within 500m of highways",
fill = "Closest Highway",
color = "Closest Highway") +
theme_minimal()
# simple scatter of price (as log price per total area in sq ft) and dist2closesthighway
ggplot(data = prop500,
aes(x = dist_to_closest_highway, y = log_price_perTA,
color = closest_highway, fill = closest_highway)) +
geom_point(alpha = .2, size = 0.3) +
geom_smooth(method = "lm", size = 1.2) +
scale_color_manual(values = c("US-1" = "#1a9988",
"I-76" = "#eb5600",
"I-95" = "#7fe3d6",
"I-676" = "#eba075")) +
scale_fill_manual(values = c("US-1" = "#1a9988",
"I-76" = "#eb5600",
"I-95" = "#7fe3d6",
"I-676" = "#eba075")) +
labs(title = "Property Price (log price per TA) as a function of Distance to Closest Highway",
subtitle = "Colored by Closest Highway; Properties within 500m of highways",
fill = "Closest Highway",
color = "Closest Highway") +
theme_minimal()
# simple regression of price (log_price_perTA) and dist2closesthighway
fit1 <- lm(log_price_perTA ~ dist_to_closest_highway, data = prop500)
summary(fit1)
##
## Call:
## lm(formula = log_price_perTA ~ dist_to_closest_highway, data = prop500)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.6232 -0.7750 -0.1020 0.6889 5.6123
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.427e+00 1.198e-02 369.595 <2e-16 ***
## dist_to_closest_highway -3.570e-05 1.176e-05 -3.035 0.0024 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.164 on 51946 degrees of freedom
## Multiple R-squared: 0.0001773, Adjusted R-squared: 0.0001581
## F-statistic: 9.213 on 1 and 51946 DF, p-value: 0.002405
# multiple regression of price (log_price_perTA) and dist2closesthighway but adding highway type (4 types)
fit2 <- lm(log_price_perTA ~ dist_to_closest_highway + closest_highway, data = prop500)
summary(fit2)
##
## Call:
## lm(formula = log_price_perTA ~ dist_to_closest_highway + closest_highway,
## data = prop500)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.0973 -0.6535 0.0601 0.6854 4.9569
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.585e+00 3.522e-02 158.558 < 2e-16 ***
## dist_to_closest_highway -6.911e-05 1.076e-05 -6.421 1.37e-10 ***
## closest_highwayI-76 -6.248e-01 3.862e-02 -16.177 < 2e-16 ***
## closest_highwayI-95 -6.464e-01 3.510e-02 -18.414 < 2e-16 ***
## closest_highwayUS-1 -1.582e+00 3.488e-02 -45.352 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.056 on 51943 degrees of freedom
## Multiple R-squared: 0.1763, Adjusted R-squared: 0.1762
## F-statistic: 2778 on 4 and 51943 DF, p-value: < 2.2e-16
# multiple regression of price (log_price_perTA) and dist2closesthighway but adding highway type (4 types) with interaction
fit3 <- lm(log_price_perTA ~ dist_to_closest_highway + closest_highway + dist_to_closest_highway*closest_highway, data = prop500)
summary(fit3)
##
## Call:
## lm(formula = log_price_perTA ~ dist_to_closest_highway + closest_highway +
## dist_to_closest_highway * closest_highway, data = prop500)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.0474 -0.6491 0.0599 0.6785 4.9409
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 5.516e+00 7.081e-02 77.894
## dist_to_closest_highway 2.188e-05 8.144e-05 0.269
## closest_highwayI-76 -1.182e+00 8.906e-02 -13.277
## closest_highwayI-95 -3.554e-01 7.288e-02 -4.877
## closest_highwayUS-1 -1.623e+00 7.233e-02 -22.439
## dist_to_closest_highway:closest_highwayI-76 4.753e-04 9.363e-05 5.077
## dist_to_closest_highway:closest_highwayI-95 -3.329e-04 8.319e-05 -4.001
## dist_to_closest_highway:closest_highwayUS-1 3.126e-05 8.276e-05 0.378
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## dist_to_closest_highway 0.788
## closest_highwayI-76 < 2e-16 ***
## closest_highwayI-95 1.08e-06 ***
## closest_highwayUS-1 < 2e-16 ***
## dist_to_closest_highway:closest_highwayI-76 3.86e-07 ***
## dist_to_closest_highway:closest_highwayI-95 6.31e-05 ***
## dist_to_closest_highway:closest_highwayUS-1 0.706
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.052 on 51940 degrees of freedom
## Multiple R-squared: 0.1829, Adjusted R-squared: 0.1828
## F-statistic: 1661 on 7 and 51940 DF, p-value: < 2.2e-16
# multiple regression of price (log_price_perTA) and dist2closesthighway but adding highway type (2 types)
fit4 <- lm(log_price_perTA ~ dist_to_closest_highway + highway_type, data = prop500)
summary(fit4)
##
## Call:
## lm(formula = log_price_perTA ~ dist_to_closest_highway + highway_type,
## data = prop500)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.0972 -0.6682 0.0421 0.6783 5.9801
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.961e+00 1.248e-02 397.613 < 2e-16 ***
## dist_to_closest_highway -8.915e-05 1.090e-05 -8.179 2.93e-16 ***
## highway_typethrough neighborhood -8.866e-01 9.506e-03 -93.272 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.077 on 51945 degrees of freedom
## Multiple R-squared: 0.1436, Adjusted R-squared: 0.1436
## F-statistic: 4355 on 2 and 51945 DF, p-value: < 2.2e-16
# multiple regression of price (log_price_perTA) and dist2closesthighway but adding highway type (2 types) with interaction
fit5 <- lm(log_price_perTA ~ dist_to_closest_highway + highway_type + dist_to_closest_highway*highway_type, data = prop500)
summary(fit5)
##
## Call:
## lm(formula = log_price_perTA ~ dist_to_closest_highway + highway_type +
## dist_to_closest_highway * highway_type, data = prop500)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.0760 -0.6674 0.0393 0.6698 6.0426
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 5.074e+00 1.675e-02
## dist_to_closest_highway -2.086e-04 1.607e-05
## highway_typethrough neighborhood -1.091e+00 2.234e-02
## dist_to_closest_highway:highway_typethrough neighborhood 2.209e-04 2.185e-05
## t value Pr(>|t|)
## (Intercept) 302.98 <2e-16 ***
## dist_to_closest_highway -12.98 <2e-16 ***
## highway_typethrough neighborhood -48.84 <2e-16 ***
## dist_to_closest_highway:highway_typethrough neighborhood 10.11 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.076 on 51944 degrees of freedom
## Multiple R-squared: 0.1453, Adjusted R-squared: 0.1452
## F-statistic: 2943 on 3 and 51944 DF, p-value: < 2.2e-16
Visualizing the highway types (cutting through neighborhoods vs running alongside water).
# simple scatter of price and dist2closesthighway
ggplot(data = prop500,
aes(x = dist_to_closest_highway, y = sale_price,
color = highway_type, fill = highway_type)) +
geom_point(alpha = .2, size = 0.3) +
geom_smooth(method = "lm") +
scale_color_manual(values = c("through neighborhood" = "#1a9988",
"along waterway" = "#eb5600")) +
scale_fill_manual(values = c("through neighborhood" = "#1a9988",
"along waterway" = "#eb5600")) +
labs(title = "Property Price as a function of Distance to Highway Type",
subtitle = "Colored by Highway Type; Properties within 500m of highways",
fill = "Highway Type",
color = "Highway Type") +
theme_minimal()
# simple scatter of price and dist2closesthighway with loess method
ggplot(data = prop500,
aes(x = dist_to_closest_highway, y = sale_price,
color = highway_type, fill = highway_type)) +
geom_point(alpha = .2, size = 0.3) +
geom_smooth(method = "loess", se = FALSE, size = 1.2) +
scale_color_manual(values = c("through neighborhood" = "#1a9988",
"along waterway" = "#eb5600")) +
scale_fill_manual(values = c("through neighborhood" = "#1a9988",
"along waterway" = "#eb5600")) +
labs(title = "Property Price as a function of Distance to Highway Type",
subtitle = "Colored by Highway Type; Properties within 500m of highways",
fill = "Highway Type",
color = "Highway Type") +
theme_minimal()
# simple scatter of price (as log price per total area in sq ft) and dist2closesthighway
ggplot(data = prop500,
aes(x = dist_to_closest_highway, y = log_price_perTA,
color = highway_type, fill = highway_type)) +
geom_point(alpha = .2, size = 0.3) +
geom_smooth(method = "lm", size = 1.2) +
scale_color_manual(values = c("through neighborhood" = "#1a9988",
"along waterway" = "#eb5600")) +
scale_fill_manual(values = c("through neighborhood" = "#1a9988",
"along waterway" = "#eb5600")) +
labs(title = "Property Price (log price per TA) as a function of Distance to Highway Type",
subtitle = "Colored by Highway Type; Properties within 500m of highways",
fill = "Highway Type",
color = "Highway Type") +
theme_minimal()
# correlations
prop500 %>%
st_drop_geometry() %>%
group_by(highway_type) %>%
summarize(correlation = cor(dist_to_closest_highway, log_price_perTA, method = "pearson"), .groups = "drop")
## # A tibble: 2 × 2
## highway_type correlation
## <chr> <dbl>
## 1 along waterway -0.0713
## 2 through neighborhood 0.00600
Limiting the properties further to those under $1mil
prop500_under1m <- prop500 %>%
filter(sale_price < 1e6)
ggplot(data = prop500_under1m,
aes(x = dist_to_closest_highway, y = sale_price,
color = closest_highway, fill = closest_highway)) +
geom_point(alpha = .1, size = 0.3) +
geom_smooth(method = "loess", se = FALSE, size = 1.2) +
scale_color_manual(values = c("US-1" = "#1a9988",
"I-76" = "#eb5600",
"I-95" = "#7fe3d6",
"I-676" = "#eba075")) +
scale_fill_manual(values = c("US-1" = "#1a9988",
"I-76" = "#eb5600",
"I-95" = "#7fe3d6",
"I-676" = "#eba075")) +
labs(title = "Property Price (under $1M) as a function of Distance to Closest Highway",
subtitle = "Colored by Closest Highway; Properties within 500m of highways",
fill = "Closest Highway",
color = "Closest Highway") +
theme_minimal()
# correlation of price and dist to highway for each highway
cor <- prop500_under1m %>%
st_drop_geometry() %>%
group_by(closest_highway) %>%
summarize(correlation = cor(dist_to_closest_highway, log_price_perTA, method = "pearson"), .groups = "drop")
cor
## # A tibble: 4 × 2
## closest_highway correlation
## <chr> <dbl>
## 1 I-676 0.0849
## 2 I-76 0.120
## 3 I-95 -0.108
## 4 US-1 0.0382
# correlation of price and dist to closest highway
prop500_under1m %>%
st_drop_geometry() %>%
summarize(correlation = cor(dist_to_closest_highway, log_price_perTA, method = "pearson"), .groups = "drop")
## correlation
## 1 -0.01032769
# simple scatter of price (as log price per total area in sq ft) and dist2closesthighway
ggplot(data = prop500_under1m,
aes(x = dist_to_closest_highway, y = log_price_perTA,
color = closest_highway, fill = closest_highway)) +
geom_point(alpha = .1, size = 0.3) +
geom_smooth(method = "lm", se=F, size = 1.2) +
scale_color_manual(values = c("US-1" = "#1a9988",
"I-76" = "#eb5600",
"I-95" = "#7fe3d6",
"I-676" = "#eba075")) +
scale_fill_manual(values = c("US-1" = "#1a9988",
"I-76" = "#eb5600",
"I-95" = "#7fe3d6",
"I-676" = "#eba075")) +
labs(title = "Property Price (log price per TA; under $1M) as a function of Distance to Closest Highway",
subtitle = "Colored by Closest Highway; Properties within 500m of highways",
fill = "Closest Highway",
color = "Closest Highway") +
theme_minimal()
Notably, I’m exploring the relationship between housing price and distance to highways, distance to parks, distance to commercial areas, distance to schools.
For now, I’m using the study area-related data that Luming and Sijia compiled